/**
 *  爬虫工具类
 */

let axios = require("axios");
let request = require("request");
let fsUtil = require("./lw-fs")
let cheerio = require("cheerio")
let path = require("path")
let fs = require("fs")


// console.log(axios);
// console.log(request);


// let url = "https://vip.1905.com/list/p1o6.shtml"
// request.get(url, function(err, response, body) {
//     console.log(response);
// });
// axios.get(url).then(function(response) {
//     console.log(response);
// })

/**
 * 
 * @param {获取url请求返回数据} url 
 * @returns 
 */
function getResponse(url) {
    return new Promise(function (resolve, reject) {
        request.get(url, function (err, response, body) {
            if (err) {
                reject(err)
            } else {
                resolve({ response, body })
            }
        }).on("error", (e) => {
            console.log(`获取数据失败: ${e.message}`)
        })
    });
}

/**
 * 获取电影网站起始页分类（按类型）地址
 * @returns 
 */
async function getMovieClassUrls() {
    let url = "https://vip.1905.com/list/p1o6.shtml"
    let { response, body } = await getResponse(url).catch(error => console.log('caught', error));
    // console.log(body);
    if (response.statusCode != 200) {
        console.log(`页面请求返回状态消息：${response.statusMessage}, 消息体：${body}`);
        return;
    }

    // 正则解析到类型的html内容
    let reg = /<li class="clearfix_smile"><div class="fl clr6 label">按类型<\/div>(.*?)<\/div><\/li><li class="clearfix_smile"><div class="fl clr6 label">按年代/igs;
    let result = reg.exec(body);
    // console.log(result[1])

    // 各个类型的正则 <a href="/list/t_16/p1o6.shtml" >武侠</a>
    let reg1 = /<a href="(.*?)" >(.*?)<\/a>/igs;
    let res;
    let datas = [];
    while (res = reg1.exec(result[1])) {
        if (res[2] == "全部") {
            continue
        }
        let obj = {
            电影url: "https://vip.1905.com/" + res[1],
            电影类型: res[2]
        }
        datas.push(obj);

        // 创建目录
        await fsUtil.fsMkdir(`./files/download/movies/${res[2]}`)

        getMovieUrlsByClass("https://vip.1905.com/" + res[1], res[2])
    }
    // console.log(datas);
}

/**
 * 通过分类地址获取分类下的电影资源地址
 * @param {*} url 
 * @param {*} movieClass 
 */
async function getMovieUrlsByClass(url, movieClass) {
    let { response, body } = await getResponse(url).catch(error => console.log('caught', error));
    // console.log(body);

    // 正则解析到电影资源的html内容
    let reg = /<a class="img" href="(.*?)" target="_blank" title="(.*?)".*?<\/a>/igs;
    let res;
    let datas = [];
    while (res = reg.exec(body)) {
        if (!res) {
            console.log("通过分类" + movieClass + "地址获取分类下的电影资源地址返回空数据");
            continue;
        }
        // console.log(res);
        let obj = {
            电影url: "https:" + res[1],
            电影名称: res[2]
        }
        datas.push(obj);

        getMovieInfoByUrl("https:" + res[1], movieClass)
    }
    // console.log(datas);
}

/**
 * 获取电影相关信息
 * @param {*} url 
 * @param {*} movieClass 
 */
async function getMovieInfoByUrl(url, movieClass) {
    let { response, body } = await getResponse(url).catch(error => console.log('caught', error));
    // console.log(body);

    // 正则解析电影相关信息的html内容
    let reg = /<div class="sb-mod-movie(.*?)"><h1 class="movie-title">(.*?)<\/h1><dl class="movie-info"><dd>评分：<span class="movie-score score-8"><\/span><\/dd><dd>年份：(.*?)<\/dd><dd>导演：(.*?)<\/dd><dd>主演：(.*?)<\/dd><\/dl><h4>剧情介绍：<\/h4><p class="movie-description">(.*?)<a class="clr0"(.*?)<\/p><\/div>/igs;
    let res = reg.exec(body);
    if (res) {
        let obj = {
            电影类型: movieClass,
            电影名称: res[2],
            电影url: url,
            电影年份: res[3],
            电影导演: res[4],
            电影主演: res[5],
            剧情介绍: res[6],
        }
        let info = JSON.stringify(obj);
        console.log(info);
        fsUtil.fsWriteFileSync(`./files/download/movies/${movieClass}/${res[2]}.json`, info)
    } else {
        console.log("获取电影相关信息返回空数据");
    }
}

/**
 * 获取表情图片信息
 */
async function getImgPageList() {
    let url = "https://www.doutula.com/article/list/"
    // 获取表情图片页码
    let res = await axios.get(url);
    // cheerio 解析html文档
    let $ = cheerio.load(res.data);
    let len = $(".pagination a.page-link").length
    let pageNum = $(".pagination a.page-link").eq(len - 2).text();
    console.log("pageNum: " + pageNum)
    pageNum = 1;
    for (let i = 0; i < pageNum; i++) {
        // 分页获取表情图片信息
        axios.get(url + "?page=" + pageNum).then((res) => {
            // cheerio 解析html文档
            let $ = cheerio.load(res.data);
            $("#home .col-sm-9>a.list-group-item").each((i, v) => {
                let href = $(v).attr("href")
                let title = $(v).find(".random_title").text();
                let reg = /(.*?)\d/
                title = reg.exec(title)[1]
                // 按标题分类创建目录
                fsUtil.fsMkdir(`./files/download/img/${title}`);
                parseImaPageInfo(href, title)
            })
        })
    }
}

/**
 * 通过url解析页面的表情图片内容
 * @param {*} url 
 * @param {*} title 
 */
async function parseImgPageInfo(url, title) {
    axios.get(url).then((res) => {
        // cheerio 解析html文档
        let $ = cheerio.load(res.data);
        $(".artile_des img").each((i, v) => {
            let imgUrl = $(v).attr("src")
            // 文件输出到本地
            // console.log(imgUrl)
            let extname = path.extname(imgUrl)
            // 创建文件写入流
            let ws = fs.createWriteStream(`./files/download/img/${title}/${title}-${i}${extname}`)
            axios.get(imgUrl, { responseType: "stream" }).then((res) => {
                // 图片文件流连通输出流
                res.data.pipe(ws)
                console.log("图片加载完成，imaUrl：" + imgUrl)
                // 图片文件流读取完成关闭时触发文件写入流
                res.data.on("close", () => {
                    ws.close();
                })
            })
        })
    })
}

/**
 * 使用代理ip发送请求
 */
function requestWithProxy() {
    let url = "https://www.doutula.com/article/list/?page=2"
    let options = {
        proxy: {
            host: "123.169.124.72",
            port: 9999
        }
    }
    axios.get(url, options).then((res) => {
        console.log(res.data)
    })
}

/**
 * 获取歌曲分页信息并下载
 */
async function getMusicPageList() {
    let url = "http://www.app-echo.com/api/recommend/sound-day?page=2";
    let res = await axios.get(url);
    // 按标题分类创建目录
    fsUtil.fsMkdir(`./files/download/music/`);
    fs.writeFile("./files/download/music/music.txt", "", { flag: "w" }, () => {
        console.log("清空歌曲列表文件完成：")
    })
    res.data.list.forEach(item => {
        let title = item.sound.name;
        let mp3Url = item.sound.source;
        let fileName = path.parse(mp3Url).name;
        let content = `${title},${mp3Url},${fileName}\n`
        fs.writeFile("./files/download/music/music.txt", content, { flag: "a" }, () => {
            console.log("写入列表文件完成：" + title)
        })
        downloadMusic(mp3Url, fileName);
    });
}

/**
 * 下载歌曲
 */
async function downloadMusic(mp3Url, fileName) {
    // 下载歌曲
    let ws = fs.createWriteStream(`./files/download/music/${fileName}.mp3`);
    // console.log("mp3Url: " + mp3Url)
    axios.get(mp3Url, { responseType: "stream" }).then(function(resStream) {
        resStream.data.pipe(ws);
        resStream.data.on("close", function() {
            console.log("下载成功：", mp3Url)
            ws.close();
        })
    }).catch(function(err) {
        // console.log(err)
        // console.log("下载失败：", mp3Url, err.response.status, err.response.statusText)
        console.log("下载失败：", mp3Url, err.message)
    })
}

/***
 * 输出模块函数
 */
// 爬电影链接
exports.getMovieClassUrls = getMovieClassUrls;
exports.getMovieUrlsByClass = getMovieUrlsByClass;
exports.getMovieInfoByUrl = getMovieInfoByUrl;
// 爬表情图片
exports.getImgPageList = getImgPageList;
exports.parseImgPageInfo = parseImgPageInfo;
// 代理ip发送请求
exports.requestWithProxy = requestWithProxy;
// 获取歌曲分页信息并下载
exports.getMusicPageList = getMusicPageList;
